#!/usr/bin/python
# -*- coding: UTF-8 -*-

from pyspark import SparkContext
from pyspark.streaming import StreamingContext

if __name__ == "__main__":
    sc = SparkContext(appName="HDFSFileStream")
    ssc = StreamingContext(sc, 10)

    lines = ssc.textFileStream("/bigdata/streaming")
    counts = lines.flatMap(lambda line: line.split(" ")).\
	map(lambda x: (x, 1)).reduceByKey(lambda a, b: a + b)
    counts.pprint()

    ssc.start()
    ssc.awaitTermination()